; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; Test memset 0 with variable length
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @fun0(ptr %Addr, i64 %Len) {
; CHECK-LABEL: fun0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    aghi %r3, -1
; CHECK-NEXT:    cgibe %r3, -1, 0(%r14)
; CHECK-NEXT:  .LBB0_1:
; CHECK-NEXT:    srlg %r0, %r3, 8
; CHECK-NEXT:    cgije %r0, 0, .LBB0_3
; CHECK-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    xc 0(256,%r2), 0(%r2)
; CHECK-NEXT:    la %r2, 256(%r2)
; CHECK-NEXT:    brctg %r0, .LBB0_2
; CHECK-NEXT:  .LBB0_3:
; CHECK-NEXT:    exrl %r3, .Ltmp0
; CHECK-NEXT:    br %r14
  tail call void @llvm.memset.p0.i64(ptr %Addr, i8 0, i64 %Len, i1 false)
  ret void
}

define void @fun1(ptr %Addr, i32 %Len) {
; CHECK-LABEL: fun1:
; CHECK:       # %bb.0:
; CHECK-NEXT:    llgfr %r1, %r3
; CHECK-NEXT:    aghi %r1, -1
; CHECK-NEXT:    cgibe %r1, -1, 0(%r14)
; CHECK-NEXT:  .LBB1_1:
; CHECK-NEXT:    srlg %r0, %r1, 8
; CHECK-NEXT:    cgije %r0, 0, .LBB1_3
; CHECK-NEXT:  .LBB1_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    xc 0(256,%r2), 0(%r2)
; CHECK-NEXT:    la %r2, 256(%r2)
; CHECK-NEXT:    brctg %r0, .LBB1_2
; CHECK-NEXT:  .LBB1_3:
; CHECK-NEXT:    exrl %r1, .Ltmp0
; CHECK-NEXT:    br %r14
  tail call void @llvm.memset.p0.i32(ptr %Addr, i8 0, i32 %Len, i1 false)
  ret void
}

; Test that identical target instructions get reused.
define void @fun2(ptr %Addr, i32 %Len) {
; CHECK-LABEL: fun2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    llgfr %r1, %r3
; CHECK-NEXT:    aghi %r1, -1
; CHECK-NEXT:    cgije %r1, -1, .LBB2_4
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    srlg %r0, %r1, 8
; CHECK-NEXT:    lgr %r3, %r2
; CHECK-NEXT:    cgije %r0, 0, .LBB2_3
; CHECK-NEXT:  .LBB2_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    xc 0(256,%r3), 0(%r3)
; CHECK-NEXT:    la %r3, 256(%r3)
; CHECK-NEXT:    brctg %r0, .LBB2_2
; CHECK-NEXT:  .LBB2_3:
; CHECK-NEXT:    exrl %r1, .Ltmp1
; CHECK-NEXT:  .LBB2_4:
; CHECK-NEXT:    cgije %r1, -1, .LBB2_8
; CHECK-NEXT:  # %bb.5:
; CHECK-NEXT:    srlg %r0, %r1, 8
; CHECK-NEXT:    lgr %r3, %r2
; CHECK-NEXT:    cgije %r0, 0, .LBB2_7
; CHECK-NEXT:  .LBB2_6: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    xc 0(256,%r3), 0(%r3)
; CHECK-NEXT:    la %r3, 256(%r3)
; CHECK-NEXT:    brctg %r0, .LBB2_6
; CHECK-NEXT:  .LBB2_7:
; CHECK-NEXT:    exrl %r1, .Ltmp1
; CHECK-NEXT:  .LBB2_8:
; CHECK-NEXT:    cgibe %r1, -1, 0(%r14)
; CHECK-NEXT:  .LBB2_9:
; CHECK-NEXT:    srlg %r0, %r1, 8
; CHECK-NEXT:    cgije %r0, 0, .LBB2_11
; CHECK-NEXT:  .LBB2_10: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    xc 0(256,%r2), 0(%r2)
; CHECK-NEXT:    la %r2, 256(%r2)
; CHECK-NEXT:    brctg %r0, .LBB2_10
; CHECK-NEXT:  .LBB2_11:
; CHECK-NEXT:    exrl %r1, .Ltmp0
; CHECK-NEXT:    br %r14
  tail call void @llvm.memset.p0.i32(ptr %Addr, i8 0, i32 %Len, i1 false)
  tail call void @llvm.memset.p0.i32(ptr %Addr, i8 0, i32 %Len, i1 false)
  tail call void @llvm.memset.p0.i32(ptr %Addr, i8 0, i32 %Len, i1 false)
  ret void
}

; Test that a memset to nullptr compiles.
define void @fun3(i64 %Len) {
; CHECK-LABEL: fun3:
; CHECK:       # %bb.0:
; CHECK-NEXT:    aghi %r2, -1
; CHECK-NEXT:    cgibe %r2, -1, 0(%r14)
; CHECK-NEXT:  .LBB3_1:
; CHECK-NEXT:    srlg %r0, %r2, 8
; CHECK-NEXT:    lghi %r1, 0
; CHECK-NEXT:    cgije %r0, 0, .LBB3_3
; CHECK-NEXT:  .LBB3_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    xc 0(256,%r1), 0(%r1)
; CHECK-NEXT:    la %r1, 256(%r1)
; CHECK-NEXT:    brctg %r0, .LBB3_2
; CHECK-NEXT:  .LBB3_3:
; CHECK-NEXT:    exrl %r2, .Ltmp2
; CHECK-NEXT:    br %r14
  call void @llvm.memset.p0.i64(ptr null, i8 0, i64 %Len, i1 false)
  ret void
}

; Test that a memset with a length argument that DAGCombiner will convert
; into a constant get the correct number of bytes set.
@Data = external hidden constant [1024 x i8], align 2
define void @fun4() {
; CHECK-LABEL: fun4:
; CHECK:       # %bb.0:
; CHECK-NEXT:    larl %r1, Data
; CHECK-NEXT:    xc 35(256,%r1), 35(%r1)
; CHECK-NEXT:    xc 291(256,%r1), 291(%r1)
; CHECK-NEXT:    xc 547(256,%r1), 547(%r1)
; CHECK-NEXT:    xc 803(221,%r1), 803(%r1)
; CHECK-NEXT:    mvghi 0(%r1), 989
; CHECK-NEXT:    br %r14
  call void @llvm.memset.p0.i64(
     ptr getelementptr inbounds ([1024 x i8], ptr @Data, i64 0, i64 35),
     i8 0,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 0) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 0, i64 35) to i64), i64 1)),
     i1 false)
  %i11 = getelementptr i8, ptr null,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 0) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 0, i64 35) to i64), i64 1))
  store ptr %i11, ptr undef, align 8
  ret void
}

; The same, with a resulting constant length of 0.
define void @fun5() {
; CHECK-LABEL: fun5:
; CHECK:       # %bb.0:
; CHECK-NEXT:    mvghi 0(%r1), 0
; CHECK-NEXT:    br %r14
  call void @llvm.memset.p0.i64(
     ptr getelementptr inbounds ([1024 x i8], ptr @Data, i64 0, i64 35),
     i8 0,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1)),
     i1 false)
  %i11 = getelementptr i8, ptr null,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1))
  store ptr %i11, ptr undef, align 8
  ret void
}

; The same, with a resulting constant length of 1.
define void @fun6() {
; CHECK-LABEL: fun6:
; CHECK:       # %bb.0:
; CHECK-NEXT:    larl %r1, Data
; CHECK-NEXT:    xc 35(1,%r1), 35(%r1)
; CHECK-NEXT:    mvghi 0(%r1), 1
; CHECK-NEXT:    br %r14
  call void @llvm.memset.p0.i64(
     ptr getelementptr inbounds ([1024 x i8], ptr @Data, i64 0, i64 35),
     i8 0,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 36) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1)),
     i1 false)
  %i11 = getelementptr i8, ptr null,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 36) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1))
  store ptr %i11, ptr undef, align 8
  ret void
}

; The same, with a resulting constant length of 256.
define void @fun7() {
; CHECK-LABEL: fun7:
; CHECK:       # %bb.0:
; CHECK-NEXT:    larl %r1, Data
; CHECK-NEXT:    xc 35(256,%r1), 35(%r1)
; CHECK-NEXT:    mvghi 0(%r1), 256
; CHECK-NEXT:    br %r14
  call void @llvm.memset.p0.i64(
     ptr getelementptr inbounds ([1024 x i8], ptr @Data, i64 0, i64 35),
     i8 0,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 291) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1)),
     i1 false)
  %i11 = getelementptr i8, ptr null,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 291) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1))
  store ptr %i11, ptr undef, align 8
  ret void
}

; The same, with a resulting constant length of 257.
define void @fun8() {
; CHECK-LABEL: fun8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    larl %r1, Data
; CHECK-NEXT:    xc 35(256,%r1), 35(%r1)
; CHECK-NEXT:    xc 291(1,%r1), 291(%r1)
; CHECK-NEXT:    mvghi 0(%r1), 257
; CHECK-NEXT:    br %r14
  call void @llvm.memset.p0.i64(
     ptr getelementptr inbounds ([1024 x i8], ptr @Data, i64 0, i64 35),
     i8 0,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 292) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1)),
     i1 false)
  %i11 = getelementptr i8, ptr null,
     i64 sub (i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 292) to i64), i64 1),
              i64 add (i64 ptrtoint (ptr getelementptr inbounds ([1024 x i8],
                                     ptr @Data, i64 1, i64 35) to i64), i64 1))
  store ptr %i11, ptr undef, align 8
  ret void
}

; CHECK:       .Ltmp2:
; CHECK-NEXT: 	 xc 0(1,%r1), 0(%r1)
; CHECK-NEXT:  .Ltmp0:
; CHECK-NEXT:    xc 0(1,%r2), 0(%r2)
; CHECK-NEXT:  .Ltmp1:
; CHECK-NEXT:    xc 0(1,%r3), 0(%r3)

declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
